import numpy as np
import sys
import os
import time


def main():
    if len(sys.argv) == 1:
        filedir = "/home/zy/gitee_repos/db/dataset"
        filenames = ["amazon_reviews.txt"]
        print("execute file %s" % filenames)

    elif len(sys.argv) == 2:
        filedir = sys.argv[1]
        print("execute files in dir %s" % filedir)
        filenames = [fileName for fileName in os.listdir(filedir)]

    elif len(sys.argv) == 3:
        filedir = sys.argv[1]
        filenames = [sys.argv[2]]
        print("execute file %s" % filenames)
    else:
        raise ValueError

    for filename in filenames:
        print("executing %s" % filename)
        filename_new = filename.split(".")[0] + "_2w" + ".txt"
        file_path = os.path.join(filedir, filename)
        file_path_new = os.path.join(filedir, filename_new)

        txtfile = np.loadtxt(file_path)
        txtfile_new = txtfile[:250000]
        np.savetxt(file_path_new, txtfile_new, fmt="%d")


if __name__ == "__main__":
    print(time.strftime("%Y-%m-%d %H:%M:%S"), time.localtime())
    print("supposed to finish in about 20 mins")
    tic = time.time()
    main()
    toc = time.time()
    time_elapse = (toc-tic)/float(60)
    print("time elapse %.2f min" % time_elapse)
